{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.seed(666)\n",
    "X = np.random.random(size=(1000, 10))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "true_theta = np.arange(1,12,dtype=float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_b = np.hstack([np.ones((len(X),1)), X])\n",
    "y = X_b.dot(true_theta) + np.random.normal(size=1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1000, 11)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_b.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.        , 0.70043712, 0.84418664, 0.67651434, 0.72785806,\n",
       "       0.95145796, 0.0127032 , 0.4135877 , 0.04881279, 0.09992856,\n",
       "       0.50806631])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_b[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def J(theta, X_b, y):\n",
    "    try:\n",
    "        return np.sum((y - (X_b.dot(theta)))**2) / len(X_b)\n",
    "    except:\n",
    "        return float(\"inf\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 求导\n",
    "def dJ_math(theta, X_b, y):\n",
    "    return X_b.T.dot(X_b.dot(theta) - y ) * 2. / len(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def dJ_debug(theta, X_b, y, epsilon=0.01):\n",
    "    res = np.empty(len(theta))\n",
    "    for i in range(len(theta)):\n",
    "        theta_1 = theta.copy()\n",
    "        theta_1[i] += epsilon\n",
    "        theta_2 = theta.copy()\n",
    "        theta_2[i] -= epsilon\n",
    "        res[i] = (J(theta_1, X_b, y) - J(theta_2, X_b, y)) / (2. * epsilon)\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# eta为步长\n",
    "def gradient_descent(dJ, X_b, y, initial_theta, eta, n_iters= 1e4, epsilon=1e-8):\n",
    "    theta = initial_theta\n",
    "    cur_iter = 0\n",
    "    \n",
    "    while cur_iter < n_iters:\n",
    "        gradient = dJ(theta, X_b, y)\n",
    "        last_theta = theta\n",
    "        theta = theta - eta * gradient\n",
    "        \n",
    "        # 判断是否本次梯度下降的值足够小，也就是判断是否已经拟合\n",
    "        if(abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon):\n",
    "            break\n",
    "            \n",
    "        cur_iter += 1\n",
    "    return theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 3.8 s, sys: 0 ns, total: 3.8 s\n",
      "Wall time: 3.8 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,\n",
       "        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,\n",
       "       10.90529198])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_b_1 = np.hstack([np.ones((len(X), 1)), X])\n",
    "initial_theta = np.zeros(X_b_1.shape[1])\n",
    "eta = 0.01\n",
    "\n",
    "%time theta = gradient_descent(dJ_debug, X_b_1, y, initial_theta, eta)\n",
    "theta"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 小批量梯度下降法的实现"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "# 确定随机数种子\n",
    "np.random.seed(0)\n",
    "\n",
    "# 损失函数\n",
    "def J(theta, X_b, y):\n",
    "    try:\n",
    "        return np.sum((y - (X_b.dot(theta)))**2) / len(X_b)\n",
    "    except:\n",
    "        return float(\"inf\")\n",
    "\n",
    "# 求导\n",
    "def dJ_math(theta, X_b, y):\n",
    "    return X_b.T.dot(X_b.dot(theta) - y ) * 2. / len(y)\n",
    "\n",
    "\n",
    "# 小批量梯度下降法\n",
    "def MBGD(X_b, y,  initial_theta, eta, k=10,epsilon=1e-8, n_iters=1e4):\n",
    "    \n",
    "    # 设置初始化的theta等于0\n",
    "    theta = initial_theta\n",
    "    \n",
    "    # 将数据集索引打乱\n",
    "    reset_index = np.random.permutation(len(X_b))\n",
    "    \n",
    "    cur_iter = 0\n",
    "    length = len(X_b)\n",
    "    \n",
    "    while cur_iter < n_iters:\n",
    "        for i in range(k):\n",
    "            # 将乱序索引列表分割成等分\n",
    "            one_list = reset_index[math.floor(i / k * length):math.floor((i + 1) / k * length)]\n",
    "#             one_list = range(len(X_b))[math.floor(i / k * length):math.floor((i + 1) / k * length)]\n",
    "    \n",
    "            new_X_b = X_b[one_list]\n",
    "            new_y = y[one_list]\n",
    "            \n",
    "            gradient = dJ_math(theta, new_X_b, new_y)\n",
    "            last_theta = theta \n",
    "            \n",
    "            theta = theta - eta * gradient\n",
    "\n",
    "            if J(last_theta, X_b, y) - J(theta, X_b, y) < epsilon:\n",
    "                break\n",
    "            break\n",
    "            \n",
    "        cur_iter += 1  \n",
    "\n",
    "    return theta\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 1.61382204,  1.96788628,  2.84324104,  3.81094369,  4.95497256,\n",
       "        5.93396518,  6.85894672,  7.73455626,  9.07649865,  9.99545691,\n",
       "       10.70118963])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_b = np.hstack([np.ones((len(X), 1)), X])\n",
    "initial_theta = np.zeros(X_b.shape[1])\n",
    "eta = 0.01\n",
    "\n",
    "theta = MBGD(X_b, y, initial_theta,eta,k=5)\n",
    "theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 2 s, sys: 8.11 ms, total: 2 s\n",
      "Wall time: 502 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([ 1.1251597 ,  2.05312521,  2.91522497,  4.11895968,  5.05002117,\n",
       "        5.90494046,  6.97383745,  8.00088367,  8.86213468,  9.98608331,\n",
       "       10.90529198])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time gradient_descent(dJ_math, X_b_1, y, initial_theta, eta)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
